explore
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(purrr)
library(xml2)
library(ggplot2)
library(shiny)################################################################################
# extract variables in the IRS990ScheduleD node from the given file
################################################################################
# output is a dataframe with the filename and name of each variable extracted
collect_sched_d_vars <- function(input_file) {
example_file <- xml2::read_xml(input_file)%>% xml_ns_strip()
# look at all variable names in schedule D
IRS990ScheduleD_vars <- example_file %>%
xml_find_all("//ReturnData//IRS990ScheduleD") %>%
xml_contents() %>%
xml_path()
# look at variable names of children nodes of the schedule D nodes
IRS990ScheduleD_var_children <- IRS990ScheduleD_vars %>%
map(~xml_find_all(example_file,
xpath = gsub("/",
"//" ,
.x,
fixed = TRUE) ) %>%
xml_children() %>%
xml_path()) %>%
unlist()
# include both first level schedule D nodes and children nodes
vars <- c(IRS990ScheduleD_vars, IRS990ScheduleD_var_children)
tibble(variables = vars,
filename = input_file)
}
# file paths in the directory
files <- dir( "/Users/niezhen/Desktop/Spring 2023/SDS 410/ballet_990_released_20230208",
full.names = TRUE)
# iterate over all files and extract variables present
# output is a data frame with a file name column and variable name column
all_vars <- map_df(files, collect_sched_d_vars)
all_vars# after seeing the final table in the data_dictionary counting each variables's occurence, I think it can be nice to add more variables to the endowment data. Liza also mentioned that we can try to trace building endowment change over time, so I think I can try to add BuidingGrp variable into the endowment_data
# Find all variable names that include building information
all_vars <- map_df(files, collect_sched_d_vars)
to_add <- c()
for(row in 1:nrow(all_vars)){
if(grepl("OtherLandBuildingsGrp", all_vars[row, ]$variables) | grepl("BuildingsGrp", all_vars[row, ]$variables)){
to_add <- append(to_add, all_vars[row, ]$variables)
}
}
to_add<-unique(to_add)
to_add## [1] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp"
## [2] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/OtherCostOrOtherBasisAmt"
## [3] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/DepreciationAmt"
## [4] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/BookValueAmt"
## [5] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp"
## [6] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/OtherCostOrOtherBasisAmt"
## [7] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/DepreciationAmt"
## [8] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/BookValueAmt"
## [9] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/InvestmentCostOrOtherBasisAmt"
## [10] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/InvestmentCostOrOtherBasisAmt"
Buildings Grouop Explanation from https://www.irs.gov/pub/irs-pdf/i990sd.pdf
Part VI. Land, Buildings, and Equipment Complete Part VI if the organization answered “Yes” on Form 990, Part IV, line 11a, and reported an amount on Form 990, Part X, line 10a. Reporting is required if any amount other than zero is reported on those lines.
Column (a). Enter the cost or other basis of all land, buildings, leasehold improvements, equipment, and other fixed assets held for investment purposes, such as rental properties.
Column (b). Enter the cost or other basis of all other land, buildings, leasehold improvements, equipment, and other fixed assets held for other than investment purposes, including any land, buildings, and equipment owned and used by the organization in conducting its exempt activities. The total amounts reported in columns (a) and (b) must equal the amount reported on Form 990, Part X, line 10a.
Column (c). Enter the accumulated depreciation recorded for the assets listed in columns (a) and (b). Don’t enter an amount in column (c) for line 1a, Land. The total of column (c) must equal the amount reported on Form 990, Part X, line 10b.
Column (d). Enter the sum of column (a) and column (b) minus column (c).
##From Rose's get_endowment function
get_endowment <- function(filename) {
# Retrieving the same endowment information for all
variables <- c("//Return//ReturnHeader//ReturnTs",
"//Return//ReturnHeader//Filer//EIN",
"//Return//ReturnData//IRS990//DonorRstrOrQuasiEndowmentsInd",
"//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//BeginningYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//ContributionsAmt",
"//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
"//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//OtherExpendituresAmt",
"//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//EndYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//BeginningYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//ContributionsAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//OtherExpendituresAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//EndYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//BeginningYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//ContributionsAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//OtherExpendituresAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//EndYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//BeginningYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//ContributionsAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//OtherExpendituresAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//EndYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//BeginningYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//ContributionsAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//OtherExpendituresAmt",
"//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//EndYearBalanceAmt",
"//Return//ReturnData//IRS990ScheduleD//BoardDesignatedBalanceEOYPct",
"//Return//ReturnData//IRS990ScheduleD//PrmnntEndowmentBalanceEOYPct",
"//Return//ReturnData//IRS990ScheduleD//TermEndowmentBalanceEOYPct",
"//Return//ReturnData//IRS990ScheduleD//EndowmentsHeldUnrelatedOrgInd",
"//Return//ReturnData//IRS990ScheduleD//EndowmentsHeldRelatedOrgInd",
# adding BuildingGrp and OtherLandBuildingsGrp
"/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/OtherCostOrOtherBasisAmt",
"/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/DepreciationAmt",
"/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/BookValueAmt",
"/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/OtherCostOrOtherBasisAmt",
"/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/DepreciationAmt",
"/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/BookValueAmt",
"/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/InvestmentCostOrOtherBasisAmt",
"/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/InvestmentCostOrOtherBasisAmt",
"//AmendedReturnInd",
"//Return//ReturnHeader//ReturnTypeCd"
)
# Column name; order matters, needs to align with retrieval order
variables_no_path <- c("ReturnTs",
"EIN",
"DonorRstrOrQuasiEndowmentsInd",
"CYBeginningYearBalanceAmt",
"CYContributionsAmt",
"CYInvestmentEarningsOrLossesAmt",
"CYOtherExpendituresAmt",
"CYEndYearBalanceAmt",
"CYM1BeginningYearBalanceAmt",
"CYM1ContributionsAmt",
"CYM1InvestmentEarningsOrLossesAmt",
"CYM1OtherExpendituresAmt",
"CYM1EndYearBalanceAmt",
"CYM2BeginningYearBalanceAmt",
"CYM2ContributionsAmt",
"CYM2InvestmentEarningsOrLossesAmt",
"CYM2OtherExpendituresAmt",
"CYM2EndYearBalanceAmt",
"CYM3BeginningYearBalanceAmt",
"CYM3ContributionsAmt",
"CYM3InvestmentEarningsOrLossesAmt",
"CYM3OtherExpendituresAmt",
"CYM3EndYearBalanceAmt",
"CYM4BeginningYearBalanceAmt",
"CYM4ContributionsAmt",
"CYM4InvestmentEarningsOrLossesAmt",
"CYM4OtherExpendituresAmt",
"CYM4EndYearBalanceAmt",
"BoardDesignatedBalanceEOYPct",
"PrmnntEndowmentBalanceEOYPct",
"TermEndowmentBalanceEOYPct",
"EndowmentsHeldUnrelatedOrgInd",
"EndowmentsHeldRelatedOrgInd",
"OtherLandBuildingsGrp_OtherCostOrOtherBasisAmt",
"OtherLandBuildingsGrp_DepreciationAmt",
"OtherLandBuildingsGrp_BookValueAmt",
"BuildingsGrp_OtherCostOrOtherBasisAmt",
"BuildingsGrp_DepreciationAmt",
"BuildingsGrp_BookValueAmt",
"OtherLandBuildingsGrp_InvestmentCostOrOtherBasisAmt",
"BuildingsGrp_InvestmentCostOrOtherBasisAmt",
"AmendedReturnInd",
"ReturnTypeCd"
)
xml_file <- read_xml(filename)
xml_file <- xml_ns_strip(xml_file)
# extract each variable; if it isn't present, put NA
extracted <- map(variables, ~{
value <- xml_find_all(
xml_file,
xpath =.x)
value <- ifelse(length(value) ==0,
NA,
xml_text(value)) })
names(extracted) <- variables_no_path
extracted <- extracted %>%
as_tibble()
}##Applying get_endowment to entire output
files <- dir( "/Users/niezhen/Desktop/Spring 2023/SDS 410/ballet_990_released_20230208",
full.names = TRUE)
endowment_data_building <- map_df(files, ~
get_endowment(.x))
all_data<-readRDS("./data/data_990.RDS")
filter_data <- all_data %>%
filter(ReturnTypeCd == "990" | ReturnTypeCd == "990EZ") %>%
mutate(amended_return_filled = ifelse(!is.na(AmendedReturnInd),1, 0)) %>%
group_by(EIN, fiscal_year) %>%
slice_max(order_by =amended_return_filled, n = 1) %>%
ungroup()
##Retriving Ts and EINs for filtered data
filter_ids <- filter_data %>%
select(ReturnTs, EIN)
## Adjusting data type, filtering to proper 990s
endowment_data_building <- endowment_data_building %>%
mutate(ReturnDate = as.Date(ReturnTs,
format = "%Y-%m-%d")) %>%
mutate(across(CYBeginningYearBalanceAmt:TermEndowmentBalanceEOYPct,
as.numeric)) %>%
mutate(across(c(EndowmentsHeldRelatedOrgInd, EndowmentsHeldUnrelatedOrgInd, DonorRstrOrQuasiEndowmentsInd),
~ifelse(.x == "true" | .x == "1", TRUE, FALSE))) %>%
filter(ReturnTypeCd == "990" | ReturnTypeCd == "990EZ") %>%
right_join(filter_ids, by = c("ReturnTs", "EIN")) %>%
select(-c(ReturnTypeCd,AmendedReturnInd)) #Removing columns needed for filtering
saveRDS(endowment_data_building, "./data/endowment_filtered_buildings.RDS")Preliminary Analysis of PartIV Data
Term explained: Book value is an accounting term used for both a measure of a business’s equity and the value of an asset as it appears on a balance sheet. As time goes on, the cost stays the same, but the accumulated depreciation increases, so the book value decreases.
# filter out all data each year
data_each_year<-list(
data_2015 <- filter(endowment_data_building, grepl("2015", ReturnTs)),
data_2016 <- filter(endowment_data_building, grepl("2016", ReturnTs)),
data_2017 <- filter(endowment_data_building, grepl("2017", ReturnTs)),
data_2018 <- filter(endowment_data_building, grepl("2018", ReturnTs)),
data_2019 <- filter(endowment_data_building, grepl("2019", ReturnTs)),
data_2020 <- filter(endowment_data_building, grepl("2020", ReturnTs)),
data_2021 <- filter(endowment_data_building, grepl("2021", ReturnTs)),
data_2022 <- filter(endowment_data_building, grepl("2022", ReturnTs))
)BookValueSum <- list()
# restore each year's sum for each EIN's buildings bookvalue
for(i in c(1:8)){
data = data_each_year[[i]]
df <- data.frame(data %>%
filter(!is.na(BuildingsGrp_BookValueAmt)) %>%
mutate_at(c("BuildingsGrp_BookValueAmt", "BuildingsGrp_DepreciationAmt"), as.numeric) %>%
group_by(EIN) %>%
summarise(
BookValueSum = sum(BuildingsGrp_BookValueAmt)
) %>%
mutate(year = 2014+i,
EIN = as.numeric(EIN)))
print(df)
BookValueSum[[i]] = df
}## EIN BookValueSum year
## 1 10372997 91566 2015
## 2 131882106 139511 2015
## 3 581047778 7809407 2015
## 4 581440788 7625797 2015
## 5 621018942 1879384 2015
## 6 640732185 322271 2015
## EIN BookValueSum year
## 1 10420070 1256986 2016
## 2 42312734 2881261 2016
## 3 50377245 200813 2016
## 4 131882106 74507 2016
## 5 132584273 24421710 2016
## 6 132642091 4899303 2016
## 7 132685755 9749028 2016
## 8 133307859 71215 2016
## 9 161328541 1140339 2016
## 10 210732575 1191342 2016
## 11 231629970 5825676 2016
## 12 237161084 184378 2016
## 13 237247009 241299 2016
## 14 310858562 5859803 2016
## 15 341645238 0 2016
## 16 364009741 17962769 2016
## 17 382026127 8009127 2016
## 18 391134735 283053 2016
## 19 455395602 0 2016
## 20 510394850 40525 2016
## 21 520846173 2996016 2016
## 22 581314711 6537763 2016
## 23 581420599 883662 2016
## 24 581440788 7549349 2016
## 25 630505056 800205 2016
## 26 640732185 300367 2016
## 27 730667485 7436920 2016
## 28 741394920 34192516 2016
## 29 746060386 4684505 2016
## 30 841150857 124075 2016
## 31 846038137 5747870 2016
## 32 910897129 0 2016
## 33 930765746 735820 2016
## 34 931009305 2018914 2016
## 35 941415298 17745677 2016
## 36 942427112 3202747 2016
## EIN BookValueSum year
## 1 10420070 1208788 2017
## 2 42312734 3077240 2017
## 3 50377245 168954 2017
## 4 131882106 9502 2017
## 5 132584273 23583806 2017
## 6 132642091 4632021 2017
## 7 132685755 9551656 2017
## 8 133307859 -43464 2017
## 9 231629970 5693817 2017
## 10 237161084 163240 2017
## 11 237247009 233154 2017
## 12 260868264 0 2017
## 13 341645238 0 2017
## 14 364009741 17426567 2017
## 15 382026127 3697938 2017
## 16 391134735 276806 2017
## 17 520846173 2852613 2017
## 18 581047778 7578228 2017
## 19 581314711 6258733 2017
## 20 581420599 850744 2017
## 21 581440788 7275576 2017
## 22 581891235 293053 2017
## 23 621018942 1784670 2017
## 24 630505056 792478 2017
## 25 630813626 1267425 2017
## 26 640732185 281411 2017
## 27 730667485 11213840 2017
## 28 741394920 33242880 2017
## 29 746060386 4504252 2017
## 30 841150857 91874 2017
## 31 846038137 5627827 2017
## 32 910897129 0 2017
## 33 930765746 713164 2017
## 34 931009305 88534 2017
## 35 941415298 17232095 2017
## 36 942427112 3079701 2017
## EIN BookValueSum year
## 1 10420070 1165283 2018
## 2 42312734 2841510 2018
## 3 50377245 163161 2018
## 4 131882106 0 2018
## 5 132584273 22743696 2018
## 6 132642091 4438155 2018
## 7 132685755 16346203 2018
## 8 133307859 -43464 2018
## 9 210732575 590236 2018
## 10 231629970 5560499 2018
## 11 237161084 734364 2018
## 12 237247009 225008 2018
## 13 260868264 0 2018
## 14 310858562 2054607 2018
## 15 341645238 45230 2018
## 16 364009741 16890365 2018
## 17 382026127 3490976 2018
## 18 391134735 257838 2018
## 19 436052680 23269913 2018
## 20 455395602 0 2018
## 21 520846173 2720036 2018
## 22 546049848 2979502 2018
## 23 581047778 14597136 2018
## 24 581314711 5979880 2018
## 25 581420599 817826 2018
## 26 581440788 6879884 2018
## 27 621018942 1690366 2018
## 28 630505056 339933 2018
## 29 630813626 1225063 2018
## 30 640732185 266738 2018
## 31 730667485 23021561 2018
## 32 741394920 32190211 2018
## 33 746060386 4298185 2018
## 34 841150857 74547 2018
## 35 846038137 5475088 2018
## 36 910897129 0 2018
## 37 930765746 690508 2018
## 38 941415298 16367527 2018
## 39 942427112 2950704 2018
## EIN BookValueSum year
## 1 10420070 1099749 2019
## 2 42312734 6904875 2019
## 3 50377245 150609 2019
## 4 131882106 0 2019
## 5 132584273 44797058 2019
## 6 132642091 4751256 2019
## 7 133307859 -43464 2019
## 8 210732575 551524 2019
## 9 231629970 5791687 2019
## 10 237161084 734364 2019
## 11 237247009 216863 2019
## 12 310858562 3892306 2019
## 13 311784286 327 2019
## 14 341645238 42552 2019
## 15 364009741 16354163 2019
## 16 382026127 3297989 2019
## 17 391134735 390192 2019
## 18 436052680 22742528 2019
## 19 454864757 0 2019
## 20 455395602 0 2019
## 21 520846173 2569933 2019
## 22 546049848 4630690 2019
## 23 581314711 5705452 2019
## 24 581420599 785970 2019
## 25 581440788 6635595 2019
## 26 581891235 131159 2019
## 27 621018942 14865433 2019
## 28 630813626 2320424 2019
## 29 640732185 245008 2019
## 30 741394920 31209056 2019
## 31 746060386 4071036 2019
## 32 841150857 68477 2019
## 33 846038137 5333660 2019
## 34 910897129 0 2019
## 35 930765746 667851 2019
## 36 941415298 15338838 2019
## 37 942427112 2819259 2019
## EIN BookValueSum year
## 1 10420070 1034653 2020
## 2 42312734 6605314 2020
## 3 50377245 376526 2020
## 4 131882106 0 2020
## 5 132584273 43561723 2020
## 6 132642091 5052565 2020
## 7 132685755 6895294 2020
## 8 133307859 -43464 2020
## 9 210732575 530797 2020
## 10 231629970 6530935 2020
## 11 237003520 3222367 2020
## 12 237161084 734364 2020
## 13 237247009 478976 2020
## 14 237424849 0 2020
## 15 364009741 15817960 2020
## 16 436052680 22215144 2020
## 17 454864757 0 2020
## 18 455395602 0 2020
## 19 520846173 2504039 2020
## 20 541244590 0 2020
## 21 542124670 0 2020
## 22 546049848 8865577 2020
## 23 581047778 7266609 2020
## 24 581314711 5426052 2020
## 25 581420599 753491 2020
## 26 581440788 6391306 2020
## 27 581891235 125012 2020
## 28 616033779 969035 2020
## 29 621018942 28353167 2020
## 30 630813626 1072333 2020
## 31 640732185 223496 2020
## 32 730667485 10772985 2020
## 33 741394920 30201871 2020
## 34 746060386 3896678 2020
## 35 841150857 63367 2020
## 36 846038137 5207587 2020
## 37 910897129 0 2020
## 38 930765746 1790522 2020
## 39 941415298 14305027 2020
## 40 942427112 2704888 2020
## EIN BookValueSum year
## 1 10420070 971576 2021
## 2 42312734 6066501 2021
## 3 50377245 125508 2021
## 4 131882106 0 2021
## 5 132584273 42044504 2021
## 6 132642091 5101064 2021
## 7 132685755 6653368 2021
## 8 133307859 -43464 2021
## 9 210732575 507717 2021
## 10 231629970 6422464 2021
## 11 237003520 3222367 2021
## 12 237161084 734364 2021
## 13 237247009 225913 2021
## 14 310858562 1837699 2021
## 15 311784286 0 2021
## 16 364009741 15281758 2021
## 17 382026127 3088395 2021
## 18 454864757 0 2021
## 19 461513558 0 2021
## 20 520846173 1298710 2021
## 21 542124670 0 2021
## 22 581047778 7037437 2021
## 23 581314711 5154185 2021
## 24 581891235 118865 2021
## 25 616033779 995302 2021
## 26 730667485 10324836 2021
## 27 741394920 29194686 2021
## 28 746060386 3648651 2021
## 29 841150857 53789 2021
## 30 841622654 1644891 2021
## 31 846038137 5047657 2021
## 32 910897129 0 2021
## 33 941415298 13212979 2021
## 34 942427112 5011759 2021
## 35 943197247 3246223 2021
## EIN BookValueSum year
## 1 132685755 6411442 2022
## 2 930765746 1102719 2022
# convert the list into dataframe
bookvalues <- do.call(rbind, BookValueSum)I notice that bookvalue of 131882106 turned to 0 in 2018
# visualization of overall book value
ggplot(bookvalues, aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
geom_line()+
theme_bw()+
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Schedule D Sum of Buildings Book Value",
x = "Year",
title = "Schedule D Sum of Buildings Book Value Change",
subtitle = "By Fiscal Year")# separate EIN with medium and low building book value according to previous graph
# high
bookvalues %>%
filter(BookValueSum >= 1e+07) %>%
ggplot(aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
geom_line()+
theme_bw()+
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Schedule D Sum of Buildings Book Value",
x = "Year",
title = "Schedule D Sum of Buildings Book Value Change (High)",
subtitle = "By Fiscal Year")# medium
bookvalues %>%
filter(BookValueSum < 1e+07 & BookValueSum > 2.5e+06) %>%
ggplot(aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
geom_line()+
theme_bw() +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Schedule D Sum of Buildings Book Value",
x = "Year",
title = "Schedule D Sum of Buildings Book Value Change (Medium)",
subtitle = "By Fiscal Year")# low
bookvalues %>%
filter(BookValueSum < 2.5e+06) %>%
ggplot(aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
geom_line()+
theme_bw() +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Schedule D Sum of Buildings Book Value",
x = "Year",
title = "Schedule D Sum of Buildings Book Value Change (Low)",
subtitle = "By Fiscal Year")
The book value will decrease because of depreciation, so the normal
trend of an EIN from previous visualization should be a slight downward
line. I will filter out EIN with a bizarre trend that should be : 1)
Abruptly going down 2) going up at any point 3) appear less than 3
years
(Currently I’m just doing this by eyeballing but I will write code for it later)
Caveat: ways of splitting up split up by quantile add geom_point
#dataframe with higher Book Values
high <- bookvalues %>%
filter(BookValueSum > 1e+07)weird EIN: 132584273, 621018942
medium <-bookvalues %>%
filter(BookValueSum < 1e+07 & BookValueSum > 2.5e+06)weird EIN: 546049868
low <- bookvalues %>%
filter(BookValueSum < 1e+07) 10372997